if (!require("pacman")) install.packages("pacman")

p_load(rio, here, tidyverse, skimr, panelr)

# ---- Wave 1 ----

df1 <- import(here("data", "raw download", "liss netherlands", "liss_pv_w1.dta"))
bg1 <- import(here("data", "raw download", "liss netherlands", "background_w1.dta"))

bg1 <- bg1 %>% 
  select(nomem_encr, nohouse_encr,  geslacht, gebjaar, leeftijd, burgstat, belbezig, brutoink, oplcat)

# bg1 <- bg1 %>% rename_with(~paste0(.,"_1"), -c(1)) # add a wave indicator

export(bg1, here("data", "working data", "liss netherlands", "bgwave1.csv"))

df1 <- df1 %>% 
  select(nomem_encr, # ids
         cv08a001, cv08a012, # satisfaction, political interest
         cv08a013, cv08a014, cv08a015, cv08a016, cv08a017, cv08a018, cv08a019, # confidence in...
         cv08a020, cv08a021, cv08a022, cv08a023, cv08a024, cv08a025, cv08a026, cv08a027,
         cv08a030, cv08a031, cv08a034, cv08a035, # satisfaction w/ dutch govt, parliament, politicians, parties. 
         cv08a054,  # recalled national vote
         cv08a101,  # self-placed LR
         cv08a150, cv08a103, # differences in income
         cv08a161, cv08a163 # start/end dates
         )  %>% 
  rename(vote=cv08a054)

colnames(df1) <- gsub(pattern = "cv08a", replacement= "", x = names(df1)) # remove the cv* leaving the wave id

      export(df1, here("data", "working data", "liss netherlands", "polvarwave1.csv"))
      
wave1 <- left_join(df1, bg1,           # keep only rows in the pol variables wave
                   by = "nomem_encr") 

wave1$wave <- "Wave 1" # add a variable for good measure
wave1 <- wave1 %>% rename_with(~paste0(.,"_1"), -c(1)) # add a wave indicator

export(wave1, here("data", "working data", "liss netherlands", "merged", "wave1.csv"))

# ---- Wave 2 ----

df1 <- import(here("data", "raw download", "liss netherlands", "liss_pv_w2.dta"))
bg1 <- import(here("data", "raw download", "liss netherlands", "background_w2.dta"))


bg1 <- bg1 %>% 
  select(nomem_encr, nohouse_encr,  geslacht, gebjaar, leeftijd, burgstat, belbezig, brutoink, oplcat)
export(bg1, here("data", "working data", "liss netherlands", "bgwave2.csv"))

df1 <- df1 %>% 
  select(nomem_encr, # ids
         cv09b001, cv09b012, # satisfaction, political interest
         cv09b013, cv09b014, cv09b015, cv09b016, cv09b017, cv09b018, cv09b019, # confidence in...
         cv09b020, cv09b021, cv09b022, cv09b023, cv09b024, cv09b025, cv09b026, cv09b027,
         cv09b030, cv09b031, cv09b034, cv09b035, # satisfaction w/ dutch govt, parliament, politicians, parties. 
         cv09b054,  cv09b058, # recalled national vote, prospective nat vote
         cv09b101,  # self-placed LR
         cv09b150, cv09b103, # differences in income
         cv09b161, cv09b163 # start/end dates
  ) %>% 
  rename(vote=cv09b054,
         prospvote = cv09b058)

colnames(df1) <- gsub(pattern = "cv09b", replacement= "", x = names(df1)) # remove the cv* leaving the wave id


export(df1, here("data", "working data", "liss netherlands", "polvarwave2.csv"))

wave2 <- left_join(df1, bg1,           # keep only rows in the pol variables wave
                   by = "nomem_encr") 

wave2$wave <- "Wave 2" # add a variable for good measure
wave2 <- wave2 %>% rename_with(~paste0(.,"_2"), -c(1)) # add a wave indicator

export(wave2, here("data", "working data", "liss netherlands", "merged", "wave2.csv"))

# ---- Wave 3 ----

df1 <- import(here("data", "raw download", "liss netherlands", "liss_pv_w3.dta"))
bg1 <- import(here("data", "raw download", "liss netherlands", "background_w3.dta"))

bg1 <- bg1 %>% 
  select(nomem_encr, nohouse_encr,  geslacht, gebjaar, leeftijd, burgstat, belbezig, brutoink, oplcat)
export(bg1, here("data", "working data", "liss netherlands", "bgwave3.csv"))

df1 <- df1 %>% 
  select(nomem_encr, # ids
         cv10c001, cv10c012, # satisfaction, political interest
         cv10c013, cv10c014, cv10c015, cv10c016, cv10c017, cv10c018, cv10c019, # confidence in...
         cv10c020, cv10c021, cv10c022, cv10c023, cv10c024, cv10c025, cv10c026, cv10c027,
         cv10c030, cv10c031, cv10c034, cv10c035, # satisfaction w/ dutch govt, parliament, politicians, parties. 
         cv10c054,  cv10c058, # recalled national vote, prospective nat vote
         cv10c101,  # self-placed LR
         cv10c150, cv10c103, # differences in income
         cv10c161, cv10c163 # start/end dates
  ) %>% 
  rename(vote=cv10c054,
         prospvote = cv10c058)

colnames(df1) <- gsub(pattern = "cv10c", replacement= "", x = names(df1)) # remove the cv* leaving the wave id


export(df1, here("data", "working data", "liss netherlands", "polvarwave3.csv"))

wave3 <- left_join(df1, bg1,           # keep only rows in the pol variables wave
                   by = "nomem_encr") 

wave3$wave <- "Wave 3" # add a variable for good measure
wave3 <- wave3 %>% rename_with(~paste0(.,"_3"), -c(1)) # add a wave indicator

export(wave3, here("data", "working data", "liss netherlands", "merged", "wave3.csv"))

# ---- Wave 4 ----

df1 <- import(here("data", "raw download", "liss netherlands", "liss_pv_w4.dta"))
bg1 <- import(here("data", "raw download", "liss netherlands", "background_w4.dta"))

bg1 <- bg1 %>% 
  select(nomem_encr, nohouse_encr,  geslacht, gebjaar, leeftijd, burgstat, belbezig, brutoink, oplcat)
export(bg1, here("data", "working data", "liss netherlands", "bgwave4.csv"))

df1 <- df1 %>% 
  select(nomem_encr, # ids
         cv11d001, cv11d012, # satisfaction, political interest
         cv11d013, cv11d014, cv11d015, cv11d016, cv11d017, cv11d018, cv11d019, # confidence in...
         cv11d020, cv11d021, cv11d022, cv11d023, cv11d024, cv11d025, cv11d026, cv11d027,
         cv11d030, cv11d031, cv11d034, cv11d035, # satisfaction w/ dutch govt, parliament, politicians, parties. 
         cv11d169,  cv11d171, # recalled national vote, prospective nat vote
         cv11d101,  # self-placed LR
         cv11d103, # differences in income
         cv11d161, cv11d163 # start/end dates
  ) %>% 
  rename(vote=cv11d169,
         prospvote = cv11d171)

colnames(df1) <- gsub(pattern = "cv11d", replacement= "", x = names(df1)) # remove the cv* leaving the wave id


export(df1, here("data", "working data", "liss netherlands", "polvarwave4.csv"))

wave4 <- left_join(df1, bg1,           # keep only rows in the pol variables wave
                   by = "nomem_encr") 

wave4$wave <- "Wave 4" # add a variable for good measure
wave4 <- wave4 %>% rename_with(~paste0(.,"_4"), -c(1)) # add a wave indicator


export(wave4, here("data", "working data", "liss netherlands", "merged", "wave4.csv"))

# ---- Wave 5 ----

df1 <- import(here("data", "raw download", "liss netherlands", "liss_pv_w5.dta"))
bg1 <- import(here("data", "raw download", "liss netherlands", "background_w5.dta"))

bg1 <- bg1 %>% 
  select(nomem_encr, nohouse_encr,  geslacht, gebjaar, leeftijd, burgstat, belbezig, brutoink, oplcat)

export(bg1, here("data", "working data", "liss netherlands", "bgwave5.csv"))

df1 <- df1 %>% 
  select(nomem_encr, # ids
         cv12e001, cv12e012, # satisfaction, political interest
         cv12e013, cv12e014, cv12e015, cv12e016, cv12e017, cv12e018, cv12e019, # confidence in...
         cv12e020, cv12e021, cv12e022, cv12e023, cv12e024, cv12e025, cv12e026, cv12e027,
         cv12e030, cv12e031, cv12e034, cv12e035, # satisfaction w/ dutch govt, parliament, politicians, parties. 
         cv12e169,  cv12e171, # recalled national vote, prospective nat vote
         cv12e101,  # self-placed LR
         cv12e103, # differences in income
         cv12e161, cv12e163 # start/end dates
  ) %>% 
  rename(vote=cv12e169,
         prospvote = cv12e171)

colnames(df1) <- gsub(pattern = "cv12e", replacement= "", x = names(df1)) # remove the cv* leaving the wave id

export(df1, here("data", "working data", "liss netherlands", "polvarwave5.csv"))

wave5 <- left_join(df1, bg1,           # keep only rows in the pol variables wave
                   by = "nomem_encr") 

wave5$wave <- "Wave 5" # add a variable for good measure
wave5 <- wave5 %>% rename_with(~paste0(.,"_5"), -c(1)) # add a wave indicator

export(wave5, here("data", "working data", "liss netherlands", "merged", "wave5.csv"))


# ---- wave 6 ----

df1 <- import(here("data", "raw download", "liss netherlands", "liss_pv_w6.dta"))
bg1 <- import(here("data", "raw download", "liss netherlands", "background_w6.dta"))

bg1 <- bg1 %>% 
  select(nomem_encr, nohouse_encr,  geslacht, gebjaar, leeftijd, burgstat, belbezig, brutoink, oplcat)

export(bg1, here("data", "working data", "liss netherlands", "bgwave6.csv"))

df1 <- df1 %>% 
  select(nomem_encr, # ids
         cv13f001, cv13f012, # satisfaction, political interest
         cv13f013, cv13f014, cv13f015, cv13f016, cv13f017, cv13f018, cv13f019, # confidence in...
         cv13f020, cv13f021, cv13f022, cv13f023, cv13f024, cv13f025, cv13f026, cv13f027,
         cv13f030, cv13f031, cv13f034, cv13f035, # satisfaction w/ dutch govt, parliament, politicians, parties. 
         cv13f207,  cv13f209, # recalled national vote, prospective nat vote
         cv13f101,  # self-placed LR
         cv13f103, # differences in income
         cv13f161, cv13f163 # start/end dates
  )  %>% 
  rename(vote=cv13f207,
         prospvote = cv13f209)

colnames(df1) <- gsub(pattern = "cv13f", replacement= "", x = names(df1)) # remove the cv* leaving the wave id

export(df1, here("data", "working data", "liss netherlands", "polvarwave6.csv"))

wave6 <- left_join(df1, bg1,           # keep only rows in the pol variables wave
                   by = "nomem_encr") 

wave6$wave <- "Wave 6" # add a variable for good measure
wave6 <- wave6 %>% rename_with(~paste0(.,"_6"), -c(1)) # add a wave indicator

export(wave6, here("data", "working data", "liss netherlands", "merged", "wave6.csv"))

# ---- wave 7 ----

df1 <- import(here("data", "raw download", "liss netherlands", "liss_pv_w7.dta"))
bg1 <- import(here("data", "raw download", "liss netherlands", "background_w7.dta"))

bg1 <- bg1 %>% 
  select(nomem_encr, nohouse_encr,  geslacht, gebjaar, leeftijd, burgstat, belbezig, brutoink, oplcat)
export(bg1, here("data", "working data", "liss netherlands", "bgwave7.csv"))

df1 <- df1 %>% 
  select(nomem_encr, # ids
         cv14g001, cv14g012, # satisfaction, political interest
         cv14g013, cv14g014, cv14g015, cv14g016, cv14g017, cv14g018, cv14g019, # confidence in...
         cv14g020, cv14g021, cv14g022, cv14g023, cv14g024, cv14g025, cv14g026, cv14g027,
         cv14g030, cv14g031, cv14g034, cv14g035, # satisfaction w/ dutch govt, parliament, politicians, parties. 
         cv14g207,  cv14g209, # recalled national vote, prospective nat vote
         cv14g101,  # self-placed LR
         cv14g103, # differences in income
         cv14g161, cv14g163 # start/end dates
  ) %>% 
  rename(vote=cv14g207,
         prospvote = cv14g209)

colnames(df1) <- gsub(pattern = "cv14g", replacement= "", x = names(df1)) # remove the cv* leaving the wave id


export(df1, here("data", "working data", "liss netherlands", "polvarwave7.csv"))

wave7 <- left_join(df1, bg1,           # keep only rows in the pol variables wave
                   by = "nomem_encr") 

wave7$wave <- "Wave 7" # add a variable for good measure
wave7 <- wave7 %>% rename_with(~paste0(.,"_7"), -c(1)) # add a wave indicator

export(wave7, here("data", "working data", "liss netherlands", "merged", "wave7.csv"))

# ---- wave 8 ----

df1 <- import(here("data", "raw download", "liss netherlands", "liss_pv_w8.dta"))
bg1 <- import(here("data", "raw download", "liss netherlands", "background_w8.dta"))

bg1 <- bg1 %>% 
  select(nomem_encr, nohouse_encr,  geslacht, gebjaar, leeftijd, burgstat, belbezig, brutoink, oplcat)
export(bg1, here("data", "working data", "liss netherlands", "bgwave8.csv"))

df1 <- df1 %>% 
  select(nomem_encr, # ids
         cv16h001, cv16h012, # satisfaction, political interest
         cv16h013, cv16h014, cv16h015, cv16h016, cv16h017, cv16h018, cv16h019, # confidence in...
         cv16h020, cv16h021, cv16h022, cv16h023, cv16h024, cv16h025, cv16h026, cv16h027,
         cv16h030, cv16h031, cv16h034, cv16h035, # satisfaction w/ dutch govt, parliament, politicians, parties. 
         cv16h207,  cv16h209, # recalled national vote, prospective nat vote
         cv16h101,  # self-placed LR
         cv16h103 # differences in income
  ) %>% 
  rename(vote=cv16h207,
         prospvote = cv16h209)

colnames(df1) <- gsub(pattern = "cv16h", replacement= "", x = names(df1)) # remove the cv* leaving the wave id


export(df1, here("data", "working data", "liss netherlands", "polvarwave8.csv"))

wave8 <- left_join(df1, bg1,           # keep only rows in the pol variables wave
                   by = "nomem_encr") 

wave8$wave <- "Wave 8" # add a variable for good measure
wave8 <- wave8 %>% rename_with(~paste0(.,"_8"), -c(1)) # add a wave indicator


export(wave8, here("data", "working data", "liss netherlands", "merged", "wave8.csv"))

# ---- wave 9 ----

df1 <- import(here("data", "raw download", "liss netherlands", "liss_pv_w9.dta"))
bg1 <- import(here("data", "raw download", "liss netherlands", "background_w9.dta"))

bg1 <- bg1 %>% 
  select(nomem_encr, nohouse_encr,  geslacht, gebjaar, leeftijd, burgstat, belbezig, brutoink, oplcat)
export(bg1, here("data", "working data", "liss netherlands", "bgwave9.csv"))

df1 <- df1 %>% 
  select(nomem_encr, # ids
         cv17i001, cv17i012, # satisfaction, political interest
         cv17i013, cv17i014, cv17i015, cv17i016, cv17i017, cv17i018, cv17i019, # confidence in...
         cv17i020, cv17i021, cv17i022, cv17i023, cv17i024, cv17i025, cv17i026, cv17i027,
         cv17i030, cv17i031, cv17i034, cv17i035, # satisfaction w/ dutch govt, parliament, politicians, parties. 
         cv17i207,  cv17i244, # recalled national vote, prospective nat vote
         cv17i101,  # self-placed LR
         cv17i103 # differences in income
  ) %>% 
  rename(vote=cv17i207,
         prospvote = cv17i244)

colnames(df1) <- gsub(pattern = "cv17i", replacement= "", x = names(df1)) # remove the cv* leaving the wave id

export(df1, here("data", "working data", "liss netherlands", "polvarwave9.csv"))

wave9 <- left_join(df1, bg1,           # keep only rows in the pol variables wave
                   by = "nomem_encr") 

wave9$wave <- "Wave 9" # add a variable for good measure
wave9 <- wave9 %>% rename_with(~paste0(.,"_9"), -c(1)) # add a wave indicator

export(wave9, here("data", "working data", "liss netherlands", "merged", "wave9.csv"))

# ---- Wave 10 ----

df1 <- import(here("data", "raw download", "liss netherlands", "liss_pv_w10.dta"))
bg1 <- import(here("data", "raw download", "liss netherlands", "background_w10.dta"))

bg1 <- bg1 %>% 
  select(nomem_encr, nohouse_encr,  geslacht, gebjaar, leeftijd, burgstat, belbezig, brutoink, oplcat)
export(bg1, here("data", "working data", "liss netherlands", "bgwave10.csv"))

df1 <- df1 %>% 
  select(nomem_encr, # ids
         cv18j001, cv18j012, # satisfaction, political interest
         cv18j013, cv18j014, cv18j015, cv18j016, cv18j017, cv18j018, cv18j019, # confidence in...
         cv18j020, cv18j021, cv18j022, cv18j023, cv18j024, cv18j025, cv18j026, cv18j027,
         cv18j030, cv18j031, cv18j034, cv18j035, # satisfaction w/ dutch govt, parliament, politicians, parties. 
         cv18j307,  cv18j308, # recalled national vote, prospective nat vote
         cv18j101,  # self-placed LR
         cv18j103, # differences in income
         cv18j_m1, cv18j_m2, cv18j_m3 # fieldwork periods
  ) %>% 
  rename(vote=cv18j307,
         prospvote = cv18j308)

colnames(df1) <- gsub(pattern = "cv18j", replacement= "", x = names(df1)) # remove the cv* leaving the wave id

export(df1, here("data", "working data", "liss netherlands", "polvarwave10.csv"))

wave10 <- left_join(df1, bg1,           # keep only rows in the pol variables wave
                   by = "nomem_encr") 

wave10$wave <- "Wave 10" # add a variable for good measure
wave10 <- wave10 %>% rename_with(~paste0(.,"_10"), -c(1)) # add a wave indicator

export(wave10, here("data", "working data", "liss netherlands", "merged", "wave10.csv"))


# ---- Wave 11 ----

df1 <- import(here("data", "raw download", "liss netherlands", "liss_pv_w11.dta"))
bg1 <- import(here("data", "raw download", "liss netherlands", "background_w11.dta"))

bg1 <- bg1 %>% 
  select(nomem_encr, nohouse_encr,  geslacht, gebjaar, leeftijd, burgstat, belbezig, brutoink, oplcat)
export(bg1, here("data", "working data", "liss netherlands", "bgwave11.csv"))

df1 <- df1 %>% 
  select(nomem_encr, # ids
         cv19k001, cv19k012, # satisfaction, political interest
         cv19k013, cv19k014, cv19k015, cv19k016, cv19k017, cv19k018, cv19k019, # confidence in...
         cv19k020, cv19k021, cv19k022, cv19k023, cv19k024, cv19k025, cv19k026, cv19k027,
         cv19k030, cv19k031, cv19k034, cv19k035, # satisfaction w/ dutch govt, parliament, politicians, parties. 
         cv19k307,  cv19k308, # recalled national vote, prospective nat vote
         cv19k101,  # self-placed LR
         cv19k103, # differences in income
         cv19k_m1, cv19k_m2, cv19k_m3 # fieldwork periods
  ) %>% 
  rename(vote=cv19k307,
         prospvote = cv19k308)

colnames(df1) <- gsub(pattern = "cv19k", replacement= "", x = names(df1)) # remove the cv* leaving the wave id


export(df1, here("data", "working data", "liss netherlands", "polvarwave11.csv"))

wave11 <- left_join(df1, bg1,           # keep only rows in the pol variables wave
                    by = "nomem_encr") 

wave11$wave <- "Wave 11" # add a variable for good measure
wave11 <- wave11 %>% rename_with(~paste0(.,"_11"), -c(1)) # add a wave indicator

export(wave11, here("data", "working data", "liss netherlands", "merged", "wave11.csv"))

# ---- Wave 12 ----

df1 <- import(here("data", "raw download", "liss netherlands", "liss_pv_w12.dta"))
bg1 <- import(here("data", "raw download", "liss netherlands", "background_w12.dta"))

bg1 <- bg1 %>% 
  select(nomem_encr, nohouse_encr,  geslacht, gebjaar, leeftijd, burgstat, belbezig, brutoink, oplcat)
export(bg1, here("data", "working data", "liss netherlands", "bgwave12.csv"))

df1 <- df1 %>% 
  select(nomem_encr, # ids
         cv20l001, cv20l012, # satisfaction, political interest
         cv20l013, cv20l014, cv20l015, cv20l016, cv20l017, cv20l018, cv20l019, # confidence in...
         cv20l020, cv20l021, cv20l022, cv20l023, cv20l024, cv20l025, cv20l026, cv20l027,
         cv20l030, cv20l031, cv20l034, cv20l035, # satisfaction w/ dutch govt, parliament, politicians, parties. 
         cv20l307,  cv20l308, # recalled national vote, prospective nat vote
         cv20l101,  # self-placed LR
         cv20l103, # differences in income
         cv20l_m1, cv20l_m2, cv20l_m3 # fieldwork periods
  ) %>% 
  rename(vote=cv20l307,
         prospvote = cv20l308)

colnames(df1) <- gsub(pattern = "cv20l", replacement= "", x = names(df1)) # remove the cv* leaving the wave id


export(df1, here("data", "working data", "liss netherlands", "polvarwave12.csv"))

wave12 <- left_join(df1, bg1,           # keep only rows in the pol variables wave
                    by = "nomem_encr") 

wave12$wave <- "Wave 12" # add a variable for good measure
wave12 <- wave12 %>% rename_with(~paste0(.,"_12"), -c(1)) # add a wave indicator

export(wave12, here("data", "working data", "liss netherlands", "merged", "wave12.csv"))


# ---- merge waves ----

liss <- list(wave1, wave2, wave3, wave4, wave5, wave6, wave7, wave8, wave9, wave10, wave11, wave12) %>%
  reduce(full_join, by = "nomem_encr")

# this could surely be an elegant function
# 
# liss <- liss %>%
#   mutate(t = case_when(wave_1 == "Wave 1" ~ 1,
#                        wave_2 == "Wave 2" ~ 2,
#                        wave_3 == "Wave 3" ~ 3,
#                        wave_4 == "Wave 4" ~ 4,
#                        wave_5 == "Wave 5" ~ 5,
#                        wave_6 == "Wave 6" ~ 6,
#                        wave_7 == "Wave 7" ~ 7,
#                        wave_8 == "Wave 8" ~ 8,
#                        wave_9 == "Wave 9" ~ 9,
#                        wave_10 == "Wave 10" ~ 10,
#                        wave_11 == "Wave 11" ~ 11,
#                        wave_12 == "Wave 12" ~ 12))


#skim(liss)

# Test on just 50 cols 
# liss_pan <- liss[1:50]
# liss_long <- long_panel(liss_pan, prefix = "_", begin = 1, end = 2, id = "nomem_encr", label_location = "end", wave = "time")
# table(liss_long$`001`)
# 
# liss_long %>% 
#   group_by(time) %>%
#   filter(nomem_encr == "800045") %>%
#   summarize(mean(`001`, na.rm=T))
# 
# n_distinct(liss_long$nomem_encr)

liss_long <- long_panel(liss, prefix = "_", begin = 1, end = 12, id = "nomem_encr", 
                        label_location = "end", wave = "time")

export(liss, here("data", "working data", "liss netherlands", "merged", "liss_merged.csv"))
export(liss_long, here("data", "working data", "liss netherlands", "merged", "liss_reshaped.csv"))

# liss_long <- import(here("data", "working data", "liss netherlands", "merged", "liss_reshaped.csv"))

#liss <- import(here("data", "working data", "liss netherlands", "merged", "liss_merged.csv"))

# ---- Checks ----

n_distinct(liss_long$nomem_encr) # correct number of individuals
#14896

w12 <- liss_long %>% # filter the long data just to W12 
  filter(time == 12) 

table(liss$`019_12`)
# -9    0    1    2    3    4    5    6    7    8    9   10 
# 243  457  291  369  435  560  916 1031  794  303   57   11 
table(w12$`019`) 
# -9    0    1    2    3    4    5    6    7    8    9   10 
# 243  457  291  369  435  560  916 1031  794  303   57   11 

str(liss_long)

codebook <- liss_long %>% 
  summarize_all(function(x)attr(x, "label"))
codebook <- t(codebook)

sjPlot::view_df(liss_long)  


# ---- Cleaning ----

# want to punch out: 99, 999, -9 (DKs), 998 (prefer not to say), "blank", -13, -15

liss_long <- liss_long %>% 
  na_if(., 99) %>% na_if(., 999) %>% na_if(., 998) %>% na_if(., -13) %>% na_if(., -15) %>% na_if(., -9)

# rename the variables 

liss_long <- liss_long %>%
      rename(
             id=nomem_encr,
             satgov=`001`,
             polint = `012`,
             confgov = `013`,
             confparl = `014`,
             conflegal = `015`,
             confpol = `016`,
             confpolt = `017`,
             confpolpart = `018`,
             confep = `019`,
             confun = `020`,
             confmed = `021`,
             confmilit = `022`,
             confeduc = `023`,
             confhealth = `024`,
             confsci = `025`,
             confecon = `026`,
             confdemoc = `027`,
             satisfgov = `030`,
             satisfparl = `031`,
             satisfpol = `034`,
             satisfpart = `035`,
             leftright = `101`,
             redistclass = `150`,
             reddistincome = `103`,
             start = `161`,
             end = `163`,
             houseno = nohouse_encr,
             gender = geslacht,
             yob = gebjaar,
             age = leeftijd,
             civilstat = burgstat,
             occup = belbezig,
             income = brutoink,
             educ = oplcat,
             startdatev1 = `_m1`,
             startdatev2 = `_m2`,
             startdatev3 = `_m3`)

sjPlot::view_df(liss_long)  

liss_long <- liss_long %>%
  mutate(
         workstatus = case_when(occup %in% c(1, 2, 3) ~ 1,
                           occup %in% c(4, 5, 6, 8, 10, 14, 13, 11, 12) ~ 4,
                           occup == 7 ~ 2,
                           occup == 9 ~ 3),
         edu3 = case_when(educ == 1 ~ 1,
                          educ %in% c(2:5) ~ 2,
                          educ == 6 ~ 3),
         income10 = ntile(income, 10))

# Reorder the ones that need it

liss_long <- liss_long %>%
  mutate(
  wave = factor(wave,
                levels = c("Wave 1",
                           "Wave 2",
                           "Wave 3",
                           "Wave 4",
                           "Wave 5",
                           "Wave 6",
                           "Wave 7",
                           "Wave 8",
                           "Wave 9",
                           "Wave 10",
                           "Wave 11",
                           "Wave 12"),
                ordered = TRUE)
  )

sjPlot::view_df(liss_long)  

liss_long %>% filter(wave == "NA") %>% count(id) # should be null 

codebook_liss <- liss %>% 
  summarize_all(function(x)attr(x, "label"))
codebook_liss <- t(codebook_liss)

liss_long <- liss_long %>% filter(confpolt != "NA") # Remove rows where confpolt is NA

liss_long <- liss_long %>% # reverse political interest 
  mutate(polint = abs(polint-4))


### Creating month 

# Create new variable "wave" with values from "time"
test1$wave <- test1$time

# Create new variable "year" with values from "wave" + 2007
test1$year <- test1$wave + 2007

# If "wave" is greater than 7, add 1 to "year"
test1$year[test1$wave > 7] <- test1$year[test1$wave > 7] + 1

# Create new variable "month" with values from (wave-1)*12 + 1
test1$month <- (test1$wave-1)*12 + 1

# If "wave" is greater than 7, add 12 to "month"
test1$month[test1$wave > 7] <- test1$month[test1$wave > 7] + 12

### Creating the incumbency variable

# Recode "vote" variable, replacing -8, 14, 15 with NA
test1$vote[test1$vote %in% c(-8, 14, 15)] <- NA

# Create new variable "incumbency", which is NA, except for all those `vote` NOT missing, which are zero. 
test1$incumbency <- NA
test1$incumbency[!is.na(test1$vote)] <- 0

# Replace values of "incumbency" based on conditions of "year" and "vote"
test1$incumbency[test1$year < 2011 & (test1$vote %in% c(1, 8))] <- 1
test1$incumbency[test1$year > 2010 & test1$year < 2013 & (test1$vote %in% c(1, 3))] <- 1
test1$incumbency[test1$year > 2012 & test1$year < 2018 & (test1$vote %in% c(3, 2))] <- 1
test1$incumbency[test1$year > 2017 & test1$year < 2021 & (test1$vote %in% c(1,3,7,8))] <- 1


